/******************************************************************************
This file cleans the baseline test score data.
--------------------------------------------------------------------------------
Input:			Raw 3rd - 8th grade standardized test data sets
					> {yyyy}-{yy}_Student_test-Biog_All_G38_NYC_Scrambled.dta

Output:			Cleaned 3rd - 8th grade standardized test data sets
					> "${cleandata}test-Biog_appended.dta"
--------------------------------------------------------------------------------*/

	clear all
	local append 1

// Append all files
if `append' == 1 {
	set obs 1
	gen  year = .
	tempfile append
	sa `append'

	// Loop over years
	forval yr = 2005/2018 {

		local digits_plus = `yr' - 2000 + 1
		if length("`digits_plus'") == 1 local digits_plus 0`digits_plus'
		local next_year = `yr' +  1

		// Load raw file
		use "${cleandata}outcomes/TestBiog/`yr'-`digits_plus'_Student_test-Biog_All_G38_NYC_Scrambled", clear

		destring *grade *level *score, replace force

		* Duplicate
		if `yr' ==2017{
			drop if student_id_scram =="345022136" & math_perf_level == 2
		}

		cap destring *_raw_score, replace force
		destring *_test_grade, replace
		destring *_scale_score, replace

		// Generate spring year indicator
		gen bl_exam_year = `next_year'

		append using `append'

		sa `append', replace

	}

	keep *_raw_score *_test_grade bl_exam_year stu *_scale_score

	// Reshape long by test x student x year

	reshape long  @_raw_score @_test_grade @_scale_score, i(student_id  bl_exam_year) j(subject) string
	 ren _* *

	* Harmonize variable names
	ren student_id_scram stu

	preserve
		collapse (first) test_grade (sum) scale_score raw_score, by(stu bl_exam_year)
		g subject = "total"
		tempfile total
		sa `total'
	restore
	append using `total'
	so stu bl_exam_year subject

	sa "${cleandata}baseline_scores_standardized_all_appended.dta", replace

}

********************************************************************
******** Keeping tests for xth grade
********************************************************************

use "${cleandata}baseline_scores_standardized_all_appended.dta", clear

foreach grade in 6 7 8 {
	preserve

		rename bl_exam_year post_exam_year

		// Keep if grade matches and score is not missing
		keep if test_grade == `grade'
		drop if raw_score == . & post_exam_year <= 2017

		* Use the earliest attempt
		bys stu subject: egen min_year = min( post_exam_year )
		drop if post_exam_year != min_year
		drop min_year

		* Standardize by year
		bys subject post_exam_year test_grade: egen mean = mean(scale_score)
		bys subject post_exam_year test_grade: egen sd = sd(scale_score)
		gen post_ss  = (scale_score - mean ) / sd
		drop mean sd

		replace subject = "_" + subject

		* Reshape wide
		reshape wide post_exam_year@ test_grade@ raw_score@ post_ss@ scale_score@, i(stu) j(subject) string

		* get a total score
		cap drop post_exam_year_total test_grade_total post_ss_total scale_score_total
		g post_exam_year_total = post_exam_year_math
		g test_grade_total = test_grade_math
		g scale_score_total = scale_score_math + scale_score_ela

		* standardize the total score
		bys post_exam_year_total test_grade_total: egen mean = mean(scale_score_total)
		bys post_exam_year_total test_grade_total: egen sd = sd(scale_score_total)
		g post_ss_total  = (scale_score_total - mean ) / sd
		drop mean sd

		sa "${cleandata}post_scores_standardized_grade_ms_`grade'.dta", replace

	restore
}
